#coding=utf-8
''''
Created on 2012-4-20

@author: michaelh0226
'''
from scrapy.spider import BaseSpider 
from scrapy.selector import HtmlXPathSelector 

class GanJiSpider(BaseSpider): 
    name = "ganji.com" 
    allowed_domains = ["dmoz.org"] 
    start_urls = [ 
        "http://sh.ganji.com/fang3/pudongxinqu/a1/",
    ] 

    def parse(self, response): 
        hxs = HtmlXPathSelector(response) 
        sites = hxs.select('''//div[@id='content']/div[2][@class='list']/dl''') 
        for site in sites: 
            title = site.select('''dt/a[@class='list_title']''').extract() 
#            link = site.select('a/@href').extract() 
#            desc = site.select('text()').extract() 
            print title#, link, desc

